import pandas as pd
import tabtools as tt
from pypinyin import lazy_pinyin
import re

def test_csv_gbk_to_utf():
    csv_gbk_path = 'G:/_dataset/shidaoai/tab/_temp/shidaoai_gbk.csv'
    csv_utf_path = 'G:/_dataset/shidaoai/tab/_out/shidaoai_utf.csv'
    tt.csv_gbk_to_utf(csv_gbk_path, csv_utf_path)


def test_convert_excel_to_csv():
    excel_path = 'G:/_dataset/shidaoai/tab/_orig/esophaguscancersurgeryalone.xlsx'
    sheet_name = 'Result'

    csv_path = 'G:/_dataset/shidaoai/tab/_out/features_utf.csv'
    csv_temp_path = 'G:/_dataset/shidaoai/tab/_temp/features_gbk.csv'

    tt.convert_excel_to_csv(excel_path, csv_path, sheet_name)
    tt.convert_excel_to_csv(excel_path, csv_temp_path, sheet_name, encoding='gbk')


def sep():
    features = 'zhang ji an *^ ^^C+'
    shidaoai_1 = '张际安*'
    shidaoai_2 = '张建'

    print('-' + ''.join(re.findall('[a-z ]', features.split('^')[0].lower())).strip() + '-')
    print('-' + ' '.join(lazy_pinyin(shidaoai_1.split('*')[0])).strip() + '-')
    print('-' + ' '.join(lazy_pinyin(shidaoai_2.split('*')[0])).strip() + '-')

if __name__ == '__main__':
    # test_convert_excel_to_csv()
    test_sep()

